% scribe: Richard Crump
% lastupdate: 6 December 2005
% lecture: 14
% references: Durrett, Section 2.3
% title: Characteristic Functions: An Overview
% keywords: metric spaces, Levy metric, characteristic functions, Helly's selection theorem, tightness, determining class, uniqueness theorem, mixtures of characteristic functions, sums of independent random variables 
% end

\documentclass[12pt, letterpaper]{article}

\include{macros}


\begin{document}

\lecture{14}{Characteristic Functions: An Overview}{Richard Crump}{crump@econ.berkeley.edu}

\section{A useful property of metric spaces}
% keywords: metric spaces, Levy metric 
% end
Recall that a metric space is a set $S$ together with a real-valued function $d:S \times S \mapsto \R$ such that for every $x,y,z \in S$, 
\flushleft
\begin{enumerate}
\item $d(x,y) \geq 0$, with equality iff $x=y$
\item $d(x,y)=d(y,x)$
\item $d(x,z) \geq d(x,y) + d(y,z)$
\end{enumerate}
Now suppose $\left\{ x_n \right\}$ is a sequence in a metric space $\left(S,d\right)$ and there exists some $x \in S$ such that every subsequence of $\left\{x_n\right\}$ contains a further subsequence which converges to $x$.  Then $x_n \rightarrow x$.

\begin{proof}
We will proceed by contradiction.  Suppose $x_n \nrightarrow x$.  
Then $\exists$ an $\varepsilon$ such that $d(x_n,x)>\varepsilon \,\,\, \mathrm{i.o.}$ (i.e., $\exists$ a subsequence $n(k)$ so that $d(x_{n(k)},x)>\varepsilon$ for all k).  But this subsequence can have \underline{no} subsequence which converges to $x$.  This is a contradiction.
\end{proof}

Next notice that we can put a metric $d$ on the set $\mathcal{P}$ of probability measures on $\R$ so that $F_n \dcv F$ iff $d(F_n,F) \longrightarrow 0$ (i.e., $F_n$ converges to $F$ in the metric space $(\mathcal{P}, d)$).  

There are various ways to choose $d$.  One example is the L\'evy metric,
\[ d_L(\P,\Q) = \inf\left\{\epsilon: \, \P(-\infty, x-\epsilon]- \epsilon \leq \Q(-\infty, x] \leq \P(-\infty, x+\epsilon]+ \epsilon \, \, \text{for all} \, x \right\} \]
For another example recall that the alternative definition of convergence in distribution --- $\int f d\P_n \to \int f d\P$ for all bounded, continuous functions --- can be simplified to holding for linear interpolation functions, $f_{u,v}$ (where $u$ and $v$ are the endpoints of the linear interpolation).  Moreover, we can define this set of functions over $u$ and $v$, rational numbers, and so for $(u_n,v_n)$ a list of pairs of rationals, we may define
\[ d_R(\P,\Q) = \sum_{n=1}^\infty 2^{-n} \left[ \left| \int f_{u_n,v_n} d\P - \int f_{u_n,v_n} d\Q \right| \right] \]
Note that $d_R \leq 2$ since $\left| \int f_{u_n,v_n} d\P - \int f_{u_n,v_n} d\Q \right| \leq 2$.

\section{A useful technique}
% keywords: Helly's selection theorem, tightness, determining class
% end

The following application of Helly's Selection Theorem (\cite{durrett}, chapter 2, (2.5)) motivates characteristic functions, and transform methods in general:
\begin{theorem}
Suppose we have a collection $\mathcal C$ of bounded continuous functions $f:\R \mapsto \R $, and a sequence of probability measures $\left\{\P_n\right\}$ such that,
\begin{enumerate}
\item $\mathcal C$ is a determining class, i.e. 
\[ \int f d\P = \int f d\Q \quad \forall f\in \mathcal C \qquad \Rightarrow \qquad \P=\Q \]
or in words, $\int f d\P = \int f d\Q $ for all bounded, measurable functions implies $\P=\Q$.
\item $\left\{\P_n\right\}$ is tight
\item $\int f d\P_n$ has a limit as $n \longrightarrow \infty$ for each $f \in \mathcal C$
\end{enumerate}

Then there exists a unique probability measure $\P$ on $\R$ so that $\P_n \dcv \P$, where $\P$ is determined by $\int f d\P = \lim_{n \rightarrow \infty} \int f d\P_n$ for all $f \in \mathcal C$ (and this is also true for all functions f, bounded and continuous).
\end{theorem}

\begin{proof} We will use the fact about metric spaces from the first section.  We will show that there exists some $\P$ so that every subsequence of $\P_n$ contains a further subsequence which converges to $\P$.

How to get $\P$?
\begin{itemize}
\item[-]\underline{Helly's Selection Theorem:} Implies there exists a $\P$ on $\left[ -\infty, \infty \right]$ so that $\P_{n(k)} \stackrel{\scriptstyle v}{\longrightarrow} \P$.
\item[-] \underline{Tightness:} Implies $\P$ has no probability at $\pm \infty$ (i.e., $\P\left(\R\right)=1$).
\end{itemize}
So $\P_{n(k)} \dcv \P$ in the usual sense, i.e., 
\begin{equation} \label{eq:first} 
\int f d\P_{n(k)} \rightarrow \int f d\P \quad \forall \,\, \text{bounded,\,continuous\, functions.} \end{equation}
In particular, this holds for all f in the determining class.

Now choose any subsequence $\P_{n'(k)}$ and appeal to Helly's Selection Theorem again.  Then we have that the sub-subsequence $\P_{n''(k)}$ converges in distribution, i.e.,
\begin{equation} \label{eq:second}
\P_{n''(k)} \dcv \Q \,\, \Longleftrightarrow \,\, \int f d\P_{n''(k)} \longrightarrow \int f d\Q \quad \forall \,\, \text{bounded,\,continuous\, functions}.\end{equation}

Finally, we appeal to the third statement of the theorem, that $\int f d\P_n$ has a limit for each $f \in \mathcal C$, and equations (\ref{eq:first}) and (\ref{eq:second}) to determine that 
\[ \int f d\P = \int f d\Q \quad \forall f \in \mathcal C, \]
which by the definition of the determining class and our handy metric space fact implies that there exists a unique limiting probability measure such that $\P_n \dcv \P$.
\end{proof}


\section{Characteristic functions}
% keywords: characteristic functions, determining class, uniqueness theorem
% end

From the perspective of the previous section, we consider 
\[ \mathcal C = \left\{ \text{all} \, f(x) = \sin(tx), \,\text{some}\,t \in \R \,\, \text{and all} \, f(x)=\cos(tx), t \in \R \right\}. \]
We will show that $\mathcal C$ is \underline{determining}.  This is the uniqueness theorem for characteristic functions.

For a probability measure $\P$ on $\R$, its \underline{characteristic function} is defined to be the map $\varphi:\R \mapsto \C$ defined by
\begin{eqnarray*} \varphi(t)
&=& \int e^{ity} \cdot \P(dy) \\
&:=& \int \cos(ty) \cdot \P(dy) + i \int \sin(ty) \cdot \P(dy).
\end{eqnarray*}
The characteristic function is an extension to measures of the usual definition of the Fourier transform.  As such, the distribution function $F$ may be reconstructed from the characteristic function $\varphi$.

It may be useful to recall the infinite series representation of $e^z$,
\[ e^z = 1 + z + \frac{z^2}{2!} + \frac{z^3}{3!} + \ldots \qquad  z \in \C\] 

The uniqueness theorem is: 
\[ \varphi_{\P}(t) = \varphi_{\Q}(t) \mbox{ $\forall t \in \R$} \Longleftrightarrow \P = \Q \]
In other words, if two probability measures have the same characteristic function then they are identical.

Before proceeding we will introduce some friendlier notation.  For a random variable $X$ we define 
\begin{eqnarray*} \varphi_X(t) 
&=& \E \left[ e^{itX} \right] \\ 
&:=& \E \left[ \cos(tX) \right] + i \cdot \E \left[ \sin(tX) \right]
\end{eqnarray*}
Before proving the uniqueness theorem we will introduce a few properties of characteristic functions:

\subsection{Some properties of characteristic functions}
% keywords: characteristic functions, mixtures of characteristic functions, sums of independent random variables 
% end

\begin{enumerate}
\item \label{chf:zero} $\varphi_X(0) =1$
\item \label{chf:prop1} $|\varphi_X(t)| = | \E e^{itX} | \leq \E | e^{itX}| = 1$ 
\item \label{chf:conts} $t\mapsto\varphi_X(t)$ is continuous. 
(Note that $\varphi(t)$ is not necessarily differentiable.)
\item \label{chf:taylor} If $\E |X|^n <\infty$ for some $n \in \N$, then $\varphi_X$ has $n$ continuous derivatives and 
\[ \varphi_X(t)=1+it\E X+\cdots+\frac{(it)^n}{n!}\E X^n+o(t),\]
as $t \to 0$.
\item \label{chf:affine} $ \varphi_{aX+b} = \E \left[ e^{it(aX+b)} \right] = e^{itb} \cdot \E \left[ e^{iatX} \right] =  e^{itb} \cdot \varphi_{X}(at) $
\item \label{chf:sums} \underline{Sums of independent random variables}: Suppose $X$ and $Y$ are independent.  Then,
\[ \varphi_{X+Y}(t) = \varphi_{X}(t)\cdot\varphi_{Y}(t), \,\,\, t \in \R \]
Characteristic functions transform convolutions of measures (sums of independent random variables) into multiplication of numbers in $\C$.
\item \label{chf:symmetry} $\varphi_{-X}(t)=\varphi_X(-t)=\overline{\varphi_X(t)}$\\
\item \label{chf:uniqueness} \underline{Uniqueness}: If $\varphi_X(t)=\varphi_Y(t)$ for all $t\in \R$, then $X\stackrel{d}=Y$.
\item \label{chf:mixtures} Mixtures of characteristic functions are characteristic functions:

Let $\varphi_1,\varphi_2,\ldots,\varphi_n,$
be the characteristic functions of the distributions $F_1,F_2,\cdots,F_n$.  
Then if $p_i \geq 0$ and $\sum_{i=1}^n p_i = 1$, $\sum_{i=1}^np_i\varphi_i(t)$ is the c.f. of
\[F(x)=\sum_{i=1}^np_iF_i(x).\]  
\end{enumerate}

Properties \ref{chf:zero}, \ref{chf:symmetry} and \ref{chf:mixtures} follow immediately from the definition.  Property \ref{chf:taylor} is true since 
\[ \frac{d}{dt}\varphi_X(t) = \lim_{h\downarrow 0} \E \left[\frac{e^{i(t+h)X} - e^{itX}}{h}\right] = \lim_{h\downarrow 0} \E e^{itx} \left[\frac{e^{iXh} - 1}{h}\right] = \E (iXe^{itX}) \]
by Dominated Convergence, so $\varphi_X^{(k)}(0) = \E (iX)^k$ if $\E X^k$ exists and property \ref{chf:taylor} is now just Taylor's theorem.  Property \ref{chf:sums} follows by independence of the random variables.  Finally, Property \ref{chf:uniqueness} can be shown using Fejer's theorem in Fourier analysis.  

\subsection{Some common distributions and their characteristic functions}

\begin{table}[h]
	\centering
		\begin{tabular}[c]{|c|c|c|c|}
			\hline
			{\bf Distribution} & {\bf Density} & {\bf Support} & {\bf Characteristic Function} \\
			\hline & & & \\ 
			Standard Normal & $\frac{1}{\sqrt{2\pi}}e^{-\frac{-x^2}{2}}$ & $\R$ & $e^{-\frac{t^2}{2}}$ \\ &&& \\
			Standard Uniform & $1$ & $[0,1]$& $\frac{e^{it}-1}{it}$ \\ &&& \\
			Exponential & $e^{-x}$ & $(0, \infty)$ & $\frac{1}{1-it}$ \\ &&& \\ 
			Double Exponential & $\frac{1}{2} e^{-|x|}$ & $\R$ & $\frac{1}{1+t^2}$ \\ &&& \\
			Cauchy & $\frac{1}{\pi} \frac{1}{1+x^2}$ & $\R$ & $e^{-|t|}$ \\ &&& \\
			Triangular & $1-|x|$ & $[-1,1]$ & $2 \cdot \frac{1-\cos(t)}{t^2}$ \\ &&& \\
			\hline
		\end{tabular}
\end{table}

\bibliographystyle{plain}
\bibliography{../books.bib}
\end{document}